Import Libraries and Data¶

In [1]:
## code to display the plotly graphs in github
import plotly.io as pio
from IPython.display import HTML

pio.renderers.default = "notebook"
HTML('''
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
''')
Out[1]:
In [2]:
import os
from helper_funcs import get_tokens,get_headers,fetch_top_posts,fetch_last_posts,plotly_graphs
import pandas as pd 
import numpy as np
import ast
from sklearn.preprocessing import MultiLabelBinarizer
from xgboost import XGBClassifier
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report
import plotly.graph_objects as go
In [3]:
PRODUCT_HUNT_API_KEY = os.getenv("PRODUCT_HUNT_API_KEY")
PRODUCT_HUNT_API_SECRET = os.getenv('PRODUCT_HUNT_API_SECRET')
In [4]:
token, token_type = get_tokens()
headers = get_headers(token,token_type)
In [5]:
start_date="2024-08-01T00:00:00Z"
end_date = "2024-08-31T23:59:59Z"
In [6]:
top_posts = fetch_top_posts(start_date=start_date,end_date=end_date,headers=headers, limit =100)
last_posts = fetch_last_posts(start_date=start_date,end_date=end_date,headers=headers, limit =100)
In [7]:
print(f"Name :{top_posts[1]['node']['name']},\nDescription:{top_posts[1]['node']['description']},\nFirst comment:{top_posts[1]['node']['comments']['nodes'][0]['body']}")
Name :Me.bot,
Description:Me.bot captures and connects your thoughts to understand you better, synthesizing a coach for all your life challenges, from a big career move to a small gloomy moment.,
First comment:Hello, I’m Felix Tao, the CEO of Mindverse. Let me introduce <b>Me.bot</b> to you. It is an app I use everyday, for several hours!

Surrounded by centralized AI models, we believe in exploring a path where everyone can <b>train their own personal AI</b>. Everyone deserves an AI defined by them, not by a "Big Brother."

<b>Our solution</b>
Our product, Me.bot, is designed to be a <b>personalized AI companion</b>. It learns and evolves with you, coaching you based on your unique experiences and interactions. You can easily build your memory archive with Me.bot all-compassing multimodal recognition, and Me.bot will connect the dots of your memories to inspire and support you.

<b>Key features</b>
🌟<b>Serendipity</b>: Me.bot learns from you, offering inspiration and advice when you need it most.
🧠<b>Second Brain</b>: Me.bot helps you understand yourself better and presents its insights into you.
💬<b>Speak to Remind</b>: Set reminders with your voice—it's easier than ever.
📁<b>Smart Topics</b>: Automatically organized AI folders keep your archives tidy and easy to navigate.

We're already seeing Me.bot make a difference. Some users have told us that Me.bot suggested a LinkedIn connection that led to a <b>job opportunity</b>. Others shared that, thanks to Me.bot, they decided to pursue a degree in sociology at the age of 35.

Looking ahead, we plan to introduce features like shared memories and a bot community where your personal Me.bot can interact with others.

<b>Finally, kudos to @chrismessina who's supported our launch once again!</b>

By the way, sign up today to enjoy a <b>30-day reward</b> by completing your new user task!
In [9]:
print(f"Name :{last_posts[1]['node']['name']},\nDescription:{last_posts[1]['node']['description']},\nFirst comment:{last_posts[1]['node']['comments']['nodes'][0]['body']}")
Name :Business Digitaly,
Description:Elevate your online presence with expert SEO, Google Ads, web development, and marketing services. Maximize ROI with BusinessDigitaly today!,
First comment:Excited to announce the launch of BusinessDigitaly on Product Hunt! 🎉 As one of the top digital marketing agencies in the USA, we're dedicated to helping businesses grow through innovative digital strategies. 🚀 Whether you're looking for the best digital marketing company in the USA or need expert guidance on your digital journey, we've got you covered. Check out our product, and let's get the conversation started! We’d love to hear your thoughts and answer any questions you might have.

EDA - Top Posts of August¶

In [10]:
top_posts_df = pd.DataFrame([top_posts[i]['node'] for i in range(len(top_posts))])
top_posts_df.head()
Out[10]:
name description url votesCount createdAt tagline commentsCount comments topics
0 Wordware (YC S24) Wordware is an IDE that enables anyone to buil... https://www.producthunt.com/posts/wordware-yc-... 7465 2024-08-02T07:01:00Z Your tool for building AI agents with natural ... 165 {'nodes': [{'body': '👋🏻 Hi Product Hunt makers... {'nodes': [{'slug': 'software-engineering'}, {...
1 Me.bot Me.bot captures and connects your thoughts to ... https://www.producthunt.com/posts/me-bot-2?utm... 2664 2024-08-06T07:01:00Z The inspiring companion for your life 418 {'nodes': [{'body': 'Hello, I’m Felix Tao, the... {'nodes': [{'slug': 'productivity'}, {'slug': ...
2 10xlaunch Just add our one-line script to your website a... https://www.producthunt.com/posts/10xlaunch?ut... 1573 2024-08-27T07:01:00Z Get 10x more users from same website traffic 205 {'nodes': [{'body': 'Hey fam, Mo here from 10... {'nodes': [{'slug': 'sales'}, {'slug': 'artifi...
3 Flowith Flowith is the AI for deep work. Surpassing tr... https://www.producthunt.com/posts/flowith?utm_... 1476 2024-08-07T07:01:00Z AI for deep work 108 {'nodes': [{'body': 'Hey Product Hunt communit... {'nodes': [{'slug': 'productivity'}, {'slug': ...
4 Brainybear.ai Build AI Chatbots in 3 Steps and Train in 3 Cl... https://www.producthunt.com/posts/brainybear-a... 1370 2024-08-06T07:01:00Z Train AI chatbots in 3 clicks and help custome... 105 {'nodes': [{'body': 'Hey Hunters, I'm thrille... {'nodes': [{'slug': 'messaging'}, {'slug': 'ar...
In [11]:
top_posts_df['date'] = pd.to_datetime(top_posts_df['createdAt']).dt.date
top_posts_df['day'] = pd.to_datetime(top_posts_df['createdAt']).dt.day_name()
top_posts_df['topic_list'] = top_posts_df['topics'].apply(lambda x: [j['slug'] for j in x['nodes']])
In [12]:
plotly_graphs('histogram',top_posts_df['day'],title = 'Day of Launch')

Of the top performing posts of August, the common trend is to launch on Weekdays.

In [13]:
plotly_graphs('scatter',x=top_posts_df['day'],y=top_posts_df['votesCount'],
              title='Vote Count compared to day of Launch')
In [14]:
plotly_graphs('box',x=top_posts_df['day'],y=top_posts_df['votesCount'],
              title='Vote Count compared to day of Launch')
In [15]:
plotly_graphs('bar', 
              x=top_posts_df.groupby('day')['votesCount'].mean().sort_values(ascending=False).round(0).index,
              y=top_posts_df.groupby('day')['votesCount'].mean().sort_values(ascending=False).round(0),
              title='Average Vote Count vs Day of Launch')
In [16]:
top_posts_df.groupby('day')['votesCount'].mean().round(0).sort_values(ascending=False)
Out[16]:
day
Friday       1191.0
Tuesday       877.0
Wednesday     778.0
Monday        734.0
Thursday      691.0
Saturday      640.0
Sunday        590.0
Name: votesCount, dtype: float64
In [17]:
top_posts_df[top_posts_df['votesCount']<7000].groupby('day')['votesCount'].mean().round(0).sort_values(ascending=False)
Out[17]:
day
Tuesday      877.0
Wednesday    778.0
Monday       734.0
Thursday     691.0
Friday       668.0
Saturday     640.0
Sunday       590.0
Name: votesCount, dtype: float64
In [18]:
top_posts_df.groupby('day')['commentsCount'].mean().round(0).sort_values(ascending=False)
Out[18]:
day
Tuesday      187.0
Wednesday    174.0
Monday       172.0
Thursday     172.0
Friday       156.0
Saturday     141.0
Sunday       126.0
Name: commentsCount, dtype: float64
In [19]:
plotly_graphs('bar', 
              x=top_posts_df.groupby('day')['commentsCount'].mean().sort_values(ascending=False).round(0).index,
              y=top_posts_df.groupby('day')['commentsCount'].mean().sort_values(ascending=False).round(0),
              title='Average comments Count vs Day of Launch')

Few trends observed:

  • The average votes gained seems to be high on the weekdays rather than on Saturdays and Sundays.
  • Also it's worth noting that launch of one startup heavily skewed the average votes data

One of the reasons behind this trend may be that people like to enjoy their weekends and are not that active on Product Hunt.

Also typically, from observation, the launches that do well in the early hours or on the first day of the launch tend to do well throughout. One way to test if this observation holds true is to check the correlation between the votes gained within 24 hours to launch vs total votes gained. But this is not possible as the Product Hunt's GraphQL API only provides total votes at the moment

In [20]:
plotly_graphs('bar',
              x=top_posts_df.explode('topic_list')['topic_list'].value_counts()[:10].index,
              y=top_posts_df.explode('topic_list')['topic_list'].value_counts()[:10],
              title = 'Topics of Top 100 launches of August'
       )

As expected most (64 of 100) of the top launches in August are related to the AI domain

I have been following product Hunt for a while and one thing that stands out in top performing posts almost always has been the first comment from the maker.

EDA II¶

Let us take a look at the difference between launches that are voted the most and voted the least in the month of August

Here are a few things we can take a look at:

  • The description of the product/service
  • First comment by the maker: which provides a detailed overview of how the product works and other details

We can use the description to train a classification model to determine whether a launch will be successful based on the number of votes it gets

Although product matters the most when it comes to the votes it gets but how the product is marketed also matters

In [21]:
for i in range(len(top_posts_df[:5])):
    print(top_posts_df.loc[i,'description'])
    print()
Wordware is an IDE that enables anyone to build complex AI Agents and applications. Domain experts and engineers can now iterate 20x faster with prebuilt tools, API deployment, tracing, and more. Finally, build high-quality and reliable AI!

Me.bot captures and connects your thoughts to understand you better, synthesizing a coach for all your life challenges, from a big career move to a small gloomy moment.

Just add our one-line script to your website and know exactly who's visiting your website – get names, emails, and LinkedIn profiles of your anonymous website visitors automatically as soon as they land on your website.

Flowith is the AI for deep work. Surpassing traditional chat-based tools, it streamlines tasks on a multi-thread interface powered by a most advanced agent framework. The intuitive canvas and smart framework boost productivity, helping users stay in the flow.

Build AI Chatbots in 3 Steps and Train in 3 Clicks. Brainybear scans your website or uploaded files to deliver quick, accurate AI answers to customer queries.

In [22]:
last_posts_df = pd.DataFrame([last_posts[i]['node'] for i in range(len(top_posts))])
last_posts_df['date'] = pd.to_datetime(last_posts_df['createdAt']).dt.date
last_posts_df['day'] = pd.to_datetime(last_posts_df['createdAt']).dt.day_name()
last_posts_df['topic_list'] = last_posts_df['topics'].apply(lambda x: [j['slug'] for j in x['nodes']])
last_posts_df.head()
Out[22]:
name description url votesCount createdAt tagline commentsCount comments topics date day topic_list
0 MPS - Major Professional Services Major Professional Services (MPS) offers fixed... https://www.producthunt.com/posts/mps-major-pr... 1 2024-08-06T17:57:11Z Unlock Your Financial Potential 0 {'nodes': []} {'nodes': [{'slug': 'fintech'}, {'slug': 'inve... 2024-08-06 Tuesday [fintech, investing, money]
1 Business Digitaly Elevate your online presence with expert SEO, ... https://www.producthunt.com/posts/business-dig... 1 2024-08-06T18:01:00Z Digital Marketing Agency in USA 1 {'nodes': [{'body': 'Excited to announce the l... {'nodes': [{'slug': 'marketing'}, {'slug': 'se... 2024-08-06 Tuesday [marketing, seo, web-design]
2 Modern Brick Haus Discover Bonsai Tree DIY brick build kits at M... https://www.producthunt.com/posts/modern-brick... 1 2024-08-07T13:07:55Z Discover The Bonsai Tree - Build Kits with Mod... 0 {'nodes': []} {'nodes': [{'slug': 'home'}, {'slug': 'craftin... 2024-08-07 Wednesday [home, crafting, diy]
3 Tech Leads IT Are you seeking comprehensive Oracle Fusion SC... https://www.producthunt.com/posts/tech-leads-i... 1 2024-08-06T05:58:46Z Oracle Fusion SCM Online Training 1 {'nodes': [{'body': 'Are you seeking comprehen... {'nodes': [{'slug': 'education'}, {'slug': 'on... 2024-08-06 Tuesday [education, online-learning, career]
4 MainStreet E-commerce Highly customizable e-commerce software for bu... https://www.producthunt.com/posts/mainstreet-e... 1 2024-08-06T18:16:47Z MainStreet 1 {'nodes': [{'body': 'Feature rich out of the b... {'nodes': [{'slug': 'saas'}, {'slug': 'e-comme... 2024-08-06 Tuesday [saas, e-commerce, business]
In [23]:
for i in range(len(last_posts_df[:5])):
    print(last_posts_df.loc[i,'description'])
    print()
Major Professional Services (MPS) offers fixed income investing, bonds, private debt investments, and alternative fixed income products for institutional, wholesale, and retail investors.

Elevate your online presence with expert SEO, Google Ads, web development, and marketing services. Maximize ROI with BusinessDigitaly today!

Discover Bonsai Tree DIY brick build kits at Modern Brick Haus. You can easily create a miniature masterpiece to add greenery to your modern home. Buy Now!

Are you seeking comprehensive Oracle Fusion SCM Online Training to elevate your expertise in supply chain management? Look no further than Tech Leads IT, a leading institute renowned for its top-notch training programs. Enroll now

Highly customizable e-commerce software for businesses of medium and large sizes that your operation team will actually love.

In [24]:
top_posts_df.iloc[0]['comments']['nodes'][0]['body']
Out[24]:
'👋🏻 Hi Product Hunt makers!\n\nI’m Kamil, Head of Growth at Wordware—an IDE for building AI agents. Today, we’re officially launching the Wordware platform, and we’re excited to show the world what we built.\n\nIt’s a tool (an IDE) that enables you to quickly build custom AI agents for specific use cases like legal contract generation, marketing content automation, invoice analysis, candidate screening, generating PRDs, and many more. We call applications built on Wordware ‘WordApps’ because you can create them using natural language—in other words, using words (pun intended).\n\nOur core belief is that the domain expert—not the engineer—knows what good LLM output looks like. For example, lawyers building legal SaaS need to be deeply involved in the process, and working directly in the codebase or going back-and-forth with engineers isn’t the way to go.\n\nMost of our clients are cross-functional teams, including less technical members, who need to collaborate with engineers on LLM applications, such as assessing prompt outputs, and care about the speed of iterations. \n\nThese include early-stage startups building AI-first solutions that treat Wordware as their LLM backend, larger corporations that build 100s of prototypes on us, and AI builders creating their own products. From venture partners aiming to be in the top 0.1% of their field by building AI agents to analyze startups and founders, to lawyers scaling their expertise through AI, Wordware is the go-to tool.\n\nAnd if you’re technical, then you’ll appreciate Wordware for the speed of building complex AI agents without messy LLM abstractions, as well as our advanced capabilities like loops, conditional logic (IF-Else), structured generation (JSON mode), and custom code execution, allowing you to connect to virtually any API.\n\nNote: you must not be afraid of no/low-code tools and simply accept it’s 10x faster than writing everything yourself—not to mention the annoying process of iterations on prompts in the codebase 😉\n\nHere’s what you can do with Wordware - example use cases:\n\n✍🏻 Custom content generation AI agent: Researches topics and writes SEO-optimized blog posts using online-enabled models and looping through table of contents. It helped created fully automated content execution for many teams.\n🧠 Invoice processing: Analyzes and processes unstructured invoices efficiently (up to 25k daily by one of our clients!), categorizing them and giving insights on financial data. Not only that, based on the data on the invoices it gives personalized recommendations on where to find cheaper alternatives based on geolocalization.\n📊 Data querying and reporting: Performs data analysis through natural language commands, queries databases and constructs personalized reports. Users don’t have to be SQL fluent to get information, it can query big databases and save over 10h of work per week.\n📚 Personalized learning and assessment agent: Prepares learning material personalized to the user’s level and needs by searching Wikipedia or research papers, then prepares quizzes and assesses user answers automatically.\n✨ Sales enrichment: Searches LinkedIn and the web for information on leads, then updates and enriches your CRM with relevant data. Using different sources we can make sure the provided data is relevant and not hallucinated, making personalized outbound more effective.\n💬 Meeting summaries: Provides personal voice summaries of all your weekly meetings in a structured manner. Not only that, it also categorizes them by meeting type so you never get lost in the notes.\n\nPS: Wordware is the platform behind Audioscribe (<a href="https://audioscribe.wordware.ai" target="_blank" rel="nofollow noopener noreferrer">https://audioscribe.wordware.ai</a>) and Twitter Personality (<a href="https://twitter.wordware.ai" target="_blank" rel="nofollow noopener noreferrer">https://twitter.wordware.ai</a>) projects, our open-source projects that we built to showcase the possibilities of Wordware.\n\nPS2: our Twitter Personality app gained 1 million users in the last 4 days - it’s a great showcase of what prompting and building an LLM backend on Wordware can achieve.'
In [25]:
print(top_posts_df.iloc[0]['comments']['nodes'][0]['body'])
👋🏻 Hi Product Hunt makers!

I’m Kamil, Head of Growth at Wordware—an IDE for building AI agents. Today, we’re officially launching the Wordware platform, and we’re excited to show the world what we built.

It’s a tool (an IDE) that enables you to quickly build custom AI agents for specific use cases like legal contract generation, marketing content automation, invoice analysis, candidate screening, generating PRDs, and many more. We call applications built on Wordware ‘WordApps’ because you can create them using natural language—in other words, using words (pun intended).

Our core belief is that the domain expert—not the engineer—knows what good LLM output looks like. For example, lawyers building legal SaaS need to be deeply involved in the process, and working directly in the codebase or going back-and-forth with engineers isn’t the way to go.

Most of our clients are cross-functional teams, including less technical members, who need to collaborate with engineers on LLM applications, such as assessing prompt outputs, and care about the speed of iterations. 

These include early-stage startups building AI-first solutions that treat Wordware as their LLM backend, larger corporations that build 100s of prototypes on us, and AI builders creating their own products. From venture partners aiming to be in the top 0.1% of their field by building AI agents to analyze startups and founders, to lawyers scaling their expertise through AI, Wordware is the go-to tool.

And if you’re technical, then you’ll appreciate Wordware for the speed of building complex AI agents without messy LLM abstractions, as well as our advanced capabilities like loops, conditional logic (IF-Else), structured generation (JSON mode), and custom code execution, allowing you to connect to virtually any API.

Note: you must not be afraid of no/low-code tools and simply accept it’s 10x faster than writing everything yourself—not to mention the annoying process of iterations on prompts in the codebase 😉

Here’s what you can do with Wordware - example use cases:

✍🏻 Custom content generation AI agent: Researches topics and writes SEO-optimized blog posts using online-enabled models and looping through table of contents. It helped created fully automated content execution for many teams.
🧠 Invoice processing: Analyzes and processes unstructured invoices efficiently (up to 25k daily by one of our clients!), categorizing them and giving insights on financial data. Not only that, based on the data on the invoices it gives personalized recommendations on where to find cheaper alternatives based on geolocalization.
📊 Data querying and reporting: Performs data analysis through natural language commands, queries databases and constructs personalized reports. Users don’t have to be SQL fluent to get information, it can query big databases and save over 10h of work per week.
📚 Personalized learning and assessment agent: Prepares learning material personalized to the user’s level and needs by searching Wikipedia or research papers, then prepares quizzes and assesses user answers automatically.
✨ Sales enrichment: Searches LinkedIn and the web for information on leads, then updates and enriches your CRM with relevant data. Using different sources we can make sure the provided data is relevant and not hallucinated, making personalized outbound more effective.
💬 Meeting summaries: Provides personal voice summaries of all your weekly meetings in a structured manner. Not only that, it also categorizes them by meeting type so you never get lost in the notes.

PS: Wordware is the platform behind Audioscribe (<a href="https://audioscribe.wordware.ai" target="_blank" rel="nofollow noopener noreferrer">https://audioscribe.wordware.ai</a>) and Twitter Personality (<a href="https://twitter.wordware.ai" target="_blank" rel="nofollow noopener noreferrer">https://twitter.wordware.ai</a>) projects, our open-source projects that we built to showcase the possibilities of Wordware.

PS2: our Twitter Personality app gained 1 million users in the last 4 days - it’s a great showcase of what prompting and building an LLM backend on Wordware can achieve.
In [26]:
print(top_posts_df.iloc[1]['comments']['nodes'][0]['body'])
Hello, I’m Felix Tao, the CEO of Mindverse. Let me introduce <b>Me.bot</b> to you. It is an app I use everyday, for several hours!

Surrounded by centralized AI models, we believe in exploring a path where everyone can <b>train their own personal AI</b>. Everyone deserves an AI defined by them, not by a "Big Brother."

<b>Our solution</b>
Our product, Me.bot, is designed to be a <b>personalized AI companion</b>. It learns and evolves with you, coaching you based on your unique experiences and interactions. You can easily build your memory archive with Me.bot all-compassing multimodal recognition, and Me.bot will connect the dots of your memories to inspire and support you.

<b>Key features</b>
🌟<b>Serendipity</b>: Me.bot learns from you, offering inspiration and advice when you need it most.
🧠<b>Second Brain</b>: Me.bot helps you understand yourself better and presents its insights into you.
💬<b>Speak to Remind</b>: Set reminders with your voice—it's easier than ever.
📁<b>Smart Topics</b>: Automatically organized AI folders keep your archives tidy and easy to navigate.

We're already seeing Me.bot make a difference. Some users have told us that Me.bot suggested a LinkedIn connection that led to a <b>job opportunity</b>. Others shared that, thanks to Me.bot, they decided to pursue a degree in sociology at the age of 35.

Looking ahead, we plan to introduce features like shared memories and a bot community where your personal Me.bot can interact with others.

<b>Finally, kudos to @chrismessina who's supported our launch once again!</b>

By the way, sign up today to enjoy a <b>30-day reward</b> by completing your new user task!
In [27]:
[last_posts_df.iloc[12]['comments']['nodes'][0]['body'] if(last_posts_df.iloc[12]['comments']['nodes']) else None]
Out[27]:
[None]
In [28]:
top_first_comments_len = [len(top_posts[i]['node']['comments']['nodes'][0]['body']) 
                          for i in range(len(top_posts))]

last_first_comments_len = [len(last_posts[i]['node']['comments']['nodes'][0]['body']) 
                           if last_posts[i]['node']['comments']['nodes'] else 0
                           for i in range(len(last_posts))]
In [29]:
plotly_graphs('histogram',x=top_first_comments_len)
In [30]:
np.array(top_first_comments_len).mean()
Out[30]:
1595.18
In [31]:
plotly_graphs('histogram',x=last_first_comments_len)

As we can see from couple of example from both types of posts, the first comment from the maker is very detailed in top performing launches whereas the first comments in case of the launches with least amount of votes the comment is not very detailed and sometimes there is no comment from the maker

Classification Model¶

In [32]:
df = pd.read_csv('PH_posts_data.csv')
df.head()
Out[32]:
Unnamed: 0 name description url votesCount createdAt tagline commentsCount comments topics
0 0 Not Diamond Not Diamond isn’t like other chatbots you’ve u... https://www.producthunt.com/posts/not-diamond?... 691 2024-08-01T07:01:00Z The last chatbot you’ll ever need 219 {'nodes': [{'body': 'Hey Product Hunt!\r\n\r\n... {'nodes': [{'slug': 'developer-tools'}, {'slug...
1 1 Clarity Clarity is purpose-built for founder-led sales... https://www.producthunt.com/posts/clarity-9f37... 545 2024-08-01T07:01:00Z A meeting recorder for founder-led sales 199 {'nodes': [{'body': "Congrats on the launch Au... {'nodes': [{'slug': 'productivity'}, {'slug': ...
2 2 Mito Health Mito Health uses blood work at regular labs to... https://www.producthunt.com/posts/mito-health?... 358 2024-08-01T07:01:00Z Better insights from bloodwork 117 {'nodes': [{'body': 'Hi Product Hunt! 👋 \n\nWe... {'nodes': [{'slug': 'health-fitness'}, {'slug'...
3 3 EduWiz.AI Improve your writing effortlessly with EduWiz.... https://www.producthunt.com/posts/eduwiz-ai?ut... 292 2024-08-01T07:01:00Z Write magical paperwork in seconds with AI 105 {'nodes': [{'body': 'Hey, Product Hunt! 👋\n\nI... {'nodes': [{'slug': 'writing'}, {'slug': 'educ...
4 4 Mind Visuals Stay in your creator zone and edit videos in s... https://www.producthunt.com/posts/mind-visuals... 278 2024-08-01T07:01:00Z Drag and drop animations for creators 61 {'nodes': [{'body': 'Mind Visuals is now live!... {'nodes': [{'slug': 'design-tools'}, {'slug': ...
In [33]:
df.drop('Unnamed: 0',axis=1,inplace=True)
df['Date'] = pd.to_datetime(df['createdAt']).dt.date
# df['date'] = pd.to_datetime(df['createdAt']).dt.date
df['day'] = pd.to_datetime(df['createdAt']).dt.day_name()
df['topic_list'] = df['topics'].apply(lambda x: [j['slug'] for j in ast.literal_eval(x)['nodes']])
df.head()
Out[33]:
name description url votesCount createdAt tagline commentsCount comments topics Date day topic_list
0 Not Diamond Not Diamond isn’t like other chatbots you’ve u... https://www.producthunt.com/posts/not-diamond?... 691 2024-08-01T07:01:00Z The last chatbot you’ll ever need 219 {'nodes': [{'body': 'Hey Product Hunt!\r\n\r\n... {'nodes': [{'slug': 'developer-tools'}, {'slug... 2024-08-01 Thursday [developer-tools, artificial-intelligence, bots]
1 Clarity Clarity is purpose-built for founder-led sales... https://www.producthunt.com/posts/clarity-9f37... 545 2024-08-01T07:01:00Z A meeting recorder for founder-led sales 199 {'nodes': [{'body': "Congrats on the launch Au... {'nodes': [{'slug': 'productivity'}, {'slug': ... 2024-08-01 Thursday [productivity, sales, artificial-intelligence]
2 Mito Health Mito Health uses blood work at regular labs to... https://www.producthunt.com/posts/mito-health?... 358 2024-08-01T07:01:00Z Better insights from bloodwork 117 {'nodes': [{'body': 'Hi Product Hunt! 👋 \n\nWe... {'nodes': [{'slug': 'health-fitness'}, {'slug'... 2024-08-01 Thursday [health-fitness, artificial-intelligence, life...
3 EduWiz.AI Improve your writing effortlessly with EduWiz.... https://www.producthunt.com/posts/eduwiz-ai?ut... 292 2024-08-01T07:01:00Z Write magical paperwork in seconds with AI 105 {'nodes': [{'body': 'Hey, Product Hunt! 👋\n\nI... {'nodes': [{'slug': 'writing'}, {'slug': 'educ... 2024-08-01 Thursday [writing, education, artificial-intelligence]
4 Mind Visuals Stay in your creator zone and edit videos in s... https://www.producthunt.com/posts/mind-visuals... 278 2024-08-01T07:01:00Z Drag and drop animations for creators 61 {'nodes': [{'body': 'Mind Visuals is now live!... {'nodes': [{'slug': 'design-tools'}, {'slug': ... 2024-08-01 Thursday [design-tools, marketing, video]

Generally anything above 200 upvotes on Product Hunt has a chance of becoming product of the day and 200-300 upvotes is considered a good lauch.

In [34]:
df[df['votesCount']>200].shape
Out[34]:
(239, 12)
In [35]:
def categorize_votes(vote):
    if vote > 150:
        return 'Success'
    elif vote > 50:
        return 'Average'
    else:
        return 'low'

df['vote_category'] = df['votesCount'].apply(categorize_votes)
In [36]:
first_comments=[ast.literal_eval(df['comments'][i])['nodes'][0]['body'] 
 if len(ast.literal_eval(df['comments'][i])['nodes']) else None
 for i in range(len(df))]
In [37]:
df['first_comments'] = first_comments
df['first_comments_len']=[len(first_comments[i]) if first_comments[i] else 0 
                          for i in range(len(df['first_comments']))]
In [38]:
ast.literal_eval(df['topics'][0])['nodes']
Out[38]:
[{'slug': 'developer-tools'},
 {'slug': 'artificial-intelligence'},
 {'slug': 'bots'}]
In [39]:
df[['votesCount','commentsCount','first_comments_len']].corr()
Out[39]:
votesCount commentsCount first_comments_len
votesCount 1.000000 0.701128 0.246992
commentsCount 0.701128 1.000000 0.298725
first_comments_len 0.246992 0.298725 1.000000
In [40]:
f'Number of topics: {len(list(sorted(set(df['topic_list'].explode().values))))}'
Out[40]:
'Number of topics: 295'
In [41]:
mlb = MultiLabelBinarizer()
topics_one_hot = mlb.fit_transform(df['topic_list'])
In [42]:
plotly_graphs('histogram',
              x=[len(top_posts_df['description'][i]) for i in range(len(top_posts_df))],
              title='Length of description of Top Posts')
In [43]:
plotly_graphs('histogram',
              x=[len(last_posts_df['description'][i]) for i in range(len(top_posts_df))],
              title='Length of description of Top Posts')
In [44]:
topics_one_hot_df = pd.DataFrame(topics_one_hot, columns=mlb.classes_)
df = pd.concat([df, topics_one_hot_df], axis=1)
df.head()
Out[44]:
name description url votesCount createdAt tagline commentsCount comments topics Date ... weather web-app web-design web3 website-builder wi-fi word-games wordpress writing youtube
0 Not Diamond Not Diamond isn’t like other chatbots you’ve u... https://www.producthunt.com/posts/not-diamond?... 691 2024-08-01T07:01:00Z The last chatbot you’ll ever need 219 {'nodes': [{'body': 'Hey Product Hunt!\r\n\r\n... {'nodes': [{'slug': 'developer-tools'}, {'slug... 2024-08-01 ... 0 0 0 0 0 0 0 0 0 0
1 Clarity Clarity is purpose-built for founder-led sales... https://www.producthunt.com/posts/clarity-9f37... 545 2024-08-01T07:01:00Z A meeting recorder for founder-led sales 199 {'nodes': [{'body': "Congrats on the launch Au... {'nodes': [{'slug': 'productivity'}, {'slug': ... 2024-08-01 ... 0 0 0 0 0 0 0 0 0 0
2 Mito Health Mito Health uses blood work at regular labs to... https://www.producthunt.com/posts/mito-health?... 358 2024-08-01T07:01:00Z Better insights from bloodwork 117 {'nodes': [{'body': 'Hi Product Hunt! 👋 \n\nWe... {'nodes': [{'slug': 'health-fitness'}, {'slug'... 2024-08-01 ... 0 0 0 0 0 0 0 0 0 0
3 EduWiz.AI Improve your writing effortlessly with EduWiz.... https://www.producthunt.com/posts/eduwiz-ai?ut... 292 2024-08-01T07:01:00Z Write magical paperwork in seconds with AI 105 {'nodes': [{'body': 'Hey, Product Hunt! 👋\n\nI... {'nodes': [{'slug': 'writing'}, {'slug': 'educ... 2024-08-01 ... 0 0 0 0 0 0 0 0 1 0
4 Mind Visuals Stay in your creator zone and edit videos in s... https://www.producthunt.com/posts/mind-visuals... 278 2024-08-01T07:01:00Z Drag and drop animations for creators 61 {'nodes': [{'body': 'Mind Visuals is now live!... {'nodes': [{'slug': 'design-tools'}, {'slug': ... 2024-08-01 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 310 columns

In [45]:
daytoint = {'Monday': 1,
 'Tuesday': 2,
 'Wednesday': 3,
 'Thursday': 4,
 'Friday': 5,
 'Saturday': 6,
 'Sunday': 7,
}

vote_category_dict ={'low':0,
    'Average':1,
 'Success':2}
In [46]:
df['daytoint']= df['day'].map(daytoint)
df['description_len']= df['description'].apply(lambda i: len(i))
In [47]:
x= df.drop(list(df.dtypes[df.dtypes==object].index)+['votesCount','commentsCount'],axis=1)
y = df['vote_category']
x.shape,y.shape
Out[47]:
((1860, 298), (1860,))
In [48]:
x_train,x_test,y_train,y_test = train_test_split(x,y.map(vote_category_dict),test_size = 0.2)
x_train.shape,y_train.shape,x_test.shape,y_test.shape
Out[48]:
((1488, 298), (1488,), (372, 298), (372,))
In [49]:
y.map(vote_category_dict).value_counts()
Out[49]:
vote_category
0    1060
1     458
2     342
Name: count, dtype: int64
In [50]:
xgb= XGBClassifier(max_depth =3,max_leaves =4,learning_rate =0.1)
xgb.fit(x_train,y_train)
Out[50]:
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=3, max_leaves=4,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, objective='multi:softprob', ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=3, max_leaves=4,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=None, n_jobs=None,
              num_parallel_tree=None, objective='multi:softprob', ...)
In [51]:
print(f'Training Accuracy: {accuracy_score(xgb.predict(x_train),y_train)}')
print(f'Training Confusion matrix: {confusion_matrix(xgb.predict(x_train),y_train)}')
Training Accuracy: 0.6458333333333334
Training Confusion matrix: [[802 262 178]
 [ 18  84  19]
 [ 24  26  75]]
In [52]:
print(f'Test Accuracy: {accuracy_score(xgb.predict(x_test),y_test)}')
print(f'Test Confusion matrix: {confusion_matrix(xgb.predict(x_test),y_test)}')
Test Accuracy: 0.5860215053763441
Test Confusion matrix: [[203  76  57]
 [  7   5   3]
 [  6   5  10]]
In [53]:
print(classification_report(xgb.predict(x_test),y_test))
              precision    recall  f1-score   support

           0       0.94      0.60      0.74       336
           1       0.06      0.33      0.10        15
           2       0.14      0.48      0.22        21

    accuracy                           0.59       372
   macro avg       0.38      0.47      0.35       372
weighted avg       0.86      0.59      0.68       372